import time

import requests
import re
import csv

file = open("yyjj.csv", 'wt', newline='', encoding='utf-8')
writer = csv.writer(file)
writer.writerow(('price', 'location', 'sales', 'title'))

def getHTMLText():#获得页面函数
    header = {
        #'cookie': 'cna=lUUcF75bN0wCAYvSZz5cvOHT; t=24595ab5e9a16975923e1497b04dd5a4; lgc=tb19868925; tracknick=tb19868925; mt=ci=5_1; thw=cn; _m_h5_tk=df592de6027a1286d8d4c56f098ff8e6_1586871068528; _m_h5_tk_enc=8bff93727f0fd710a92d542fe7f31447; sgcookie=ECu0U4ZCh73jO0AG9rsYF; uc3=nk2=F5REN0xsiBRoEg%3D%3D&id2=UUphyuwKI2w9nNEoxA%3D%3D&lg2=U%2BGCWk%2F75gdr5Q%3D%3D&vt3=F8dBxdGJbjzsD2X11pU%3D; uc4=nk4=0%40FY4PZd5WDCYI2eNp1jEHFTUa3b%2Bm&id4=0%40U2grEaqHxKsz4UmPl1elMp13atxkvu2u; _cc_=U%2BGCWk%2F7og%3D%3D; enc=Dc4MKgJ%2BZn8EA92aRwDxk0TwILwME5B8hwYhVvdat7eyy4PeNQg6LVzex6vPkVORTtgQZjOaMuc4W1HPe1SWH9NtMp2CwnsVnb5kTy8fLGg%3D; tfstk=cbicBMbNoqzX9B3jdtZjBx6SF6TdaNB4GyUr4mlOEdXg7giz3sf5zwypasNricQ1.; hng=CN%7Czh-CN%7CCNY%7C156; _uab_collina=158695221636964385443175; v=0; cookie2=157cd2d9194dbdd7457fc2fc221df3ed; _tb_token_=e38fee0d351e5; uc1=cookie14=UoTUPO2FKXcQyg%3D%3D; alitrackid=www.taobao.com; lastalitrackid=www.taobao.com; x5sec=7b227365617263686170703b32223a226335653636363438633238366564353833633362663361643332623638636432434c434433505146454c6d4c714f48697961695945426f504d6a49774d44557a4f446b784d546b314d7a7332227d; JSESSIONID=486A890D1D371AE87D03F71F34E778EB; l=eBEHC9H7Qp1L5wVLBOfgSBTl_qQtQIRYSuSBEm6piT5P_B1H5a2AWZXlFsLMCnGVh6VeR3zCAVI4BeYBqBAnnxv92j-la_Hmn; isg=BAEBfsQyO8cNX1fxhJH0bX6vEE0bLnUgn3IOpWNW64hnSiEcq3448C5ALL4Mxw1Y',
        'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/78.0.3904.70 Safari/537.36',
        #'cookie': 'cna=8lgEF17vIBYCAXFvLkYxIUT9; sca=6344de24; cnaui=2952095418; aui=2952095418; cap=f4c3; cad=171808635df-5920218035024615780001; cdpid=UUGnyIRC73%252BoUQ%253D%253D; tbsa=602a5e6df44488593872b667_1587027706_14; atpsida=96f3b871f411529c7591b326_1587027706_18; atpsidas=08a9630f6a74de3b49cff873_1587027706_20; cmida=1401053355_20200416170203',
        #'referer': 'https://s.taobao.com/search?q=%E5%8C%BB%E7%94%A8%E9%85%92%E7%B2%BE&imgfile=&js=1&stats_click=search_radio_all%3A1&initiative_id=staobaoz_20200416&ie=utf8&bcoffset=3&ntoffset=3&p4ppushleft=1%2C48&s=0'
       'cookie': 'cna=8lgEF17vIBYCAXFvLkYxIUT9; miid=477484191232388817; thw=cn; lgc=%5Cu8BA9%5Cu98CE%5Cu8DDF%5Cu968F%5Cu7740; tracknick=%5Cu8BA9%5Cu98CE%5Cu8DDF%5Cu968F%5Cu7740; tg=0; enc=ky0PIaSKS%2FSfUj9tuJH6%2FdUib8NHncPJlmmP9RgVKUkEB17ydxRZfG8veq6VO0I6uL0czJ5r8lWSiroULiIGPw%3D%3D; hng=CN%7Czh-CN%7CCNY%7C156; t=9743c2bb763ef9b41a3c970a66ae6857; v=0; cookie2=15aca9a268aa34c00d262bb18e632eaa; _tb_token_=b9e7e3b7f5e; _samesite_flag_=true; dnk=%5Cu8BA9%5Cu98CE%5Cu8DDF%5Cu968F%5Cu7740; _uab_collina=158700503671627316007266; sgcookie=ErdwzwucDRoqB%2BWDCzWxM; unb=2952095418; uc3=nk2=q5qS1U9rK4W0PA%3D%3D&id2=UUGnyIRC73%2BoUQ%3D%3D&vt3=F8dBxdGKMHrXhNLZMZo%3D&lg2=UtASsssmOIJ0bQ%3D%3D; csg=8fc0e2fb; cookie17=UUGnyIRC73%2BoUQ%3D%3D; skt=e53c27fc4549fd2a; existShop=MTU4NzAyNjk3NA%3D%3D; uc4=nk4=0%40qS%2B580itwv0XNfE%2Bsikj9VmSIBZ2&id4=0%40U2OQ30fJan95UwChJyT3gmv%2Fn5UD; _cc_=V32FPkk%2Fhw%3D%3D; _l_g_=Ug%3D%3D; sg=%E7%9D%808a; _nk_=%5Cu8BA9%5Cu98CE%5Cu8DDF%5Cu968F%5Cu7740; cookie1=Vqgmjy16LWGLFy%2FY75wDTHCRSMVDV4okq%2BTB52XArKc%3D; tfstk=c43AB9ZjcLv07PG4YVKu5lylyXAhZWpTFswGWC_zAM_kTkbOiCgnJDZHc56YyQC..; alitrackid=www.taobao.com; lastalitrackid=www.taobao.com; _m_h5_tk=dda92e7608b26b70c474437bc4b54da5_1587038461058; _m_h5_tk_enc=ab5dba1552b395515ef3d6bc9ea0ec72; mt=ci=21_1; uc1=cookie16=Vq8l%2BKCLySLZMFWHxqs8fwqnEw%3D%3D&cookie21=UIHiLt3xTIkz&cookie15=URm48syIIVrSKA%3D%3D&existShop=false&pas=0&cookie14=UoTUPczxhx9Ebw%3D%3D&cart_m=0; JSESSIONID=AB1E2892ED9162CCEF9FB63EF7126CE2; l=eBL2IiJqQKUVRjCtBOfahF-NDWbTjIRAguJw_ysyiT5PsDfp56vFWZX1UGY9CnGVh6-HR35mgkfMBeYBqS24n5U62j-la6Mmn; isg=BKKiGPuXCGSHQRSj1CVPkRRS8ygE86YNpnbWWew7xZXAv0I51IFWHYK96_tDrx6l'
    }
    for i in range(0,100):
        i=44*i
        start_url='https://s.taobao.com/search?q=%E5%8C%BB%E7%94%A8%E9%85%92%E7%B2%BE&s={}'.format(i)
        print(start_url)
        r=requests.get(start_url,headers=header,timeout=30)
        time.sleep(20)
        html=r.text

        plts=re.findall(r'"view_price":"(\d+.\d+)"',html,re.S)
        llts = re.findall(r'"item_loc":"(.*?)"', html)
        slts = re.findall(r'"view_sales":"(.*?)"', html)
        tlts = re.findall(r'"raw_title":"(.*?)"', html)

        for plt,llt,slt,tlt in zip(plts,llts,slts,tlts):
            price=plt.strip()
            location=llt.split(' ')[0]
            sales=slt.strip()
            title = tlt.strip()

            if '.' in sales:
                money=sales.replace(".","")
                if '万' in money:
                    money=money.replace('万','000')
            else:
                if '万' in sales:
                    money = sales.replace('万', '000')
                else:
                    money=sales

            moneys=re.findall('\d+',money)[0]
            print(location)
            print(moneys)

            writer.writerow((price,location,moneys,title))
            print("保存成功")


if __name__ == '__main__':
    getHTMLText()
